import json

from scrapy.loader import ItemLoader

from m_y.m_y_scrapy.items import Film
import scrapy
from scrapy import FormRequest
from selenium import webdriver


def get_cookie(url):
    driver = webdriver.PhantomJS()
    driver.get(url)
    cookie_list = driver.get_cookies()
    cookie = {}
    for i in cookie_list:
        cookie[i['name']] = i['value']
    # with open('cookie', 'w') as f:
    #     f.write(json.dumps(cookie))
    driver.close()
    return json.dumps(cookie)


class M_ySpider(scrapy.Spider):
    name = 'maoyan.scrapy'
    base_url = 'https://maoyan.com'
    count = 0

    def start_requests(self):
        urls = ['https://maoyan.com/films?showType=2&offset=0']
        for url in urls:
            cookie = get_cookie(url)
            return [FormRequest(url, cookies=json.loads(cookie), callback=self.parse)]

    def parse(self, response):
        film_no_set = set(response.xpath('//*[@id="app"]/div/div[2]/div[2]/dl//descendant::*//@href').getall())
        for film_no in film_no_set:
            href = self.base_url + film_no
            yield response.follow(href, self.parse_film)

        if self.count < 4:
            href = self.base_url + '/films/?showType=2&offset=' + str(self.count * 30)
            self.count += 1
            yield response.follow(href, self.parse)

    def parse_film(self, response):

        item_loader = ItemLoader(item=Film(), response=response)
        item_loader.add_value("film_name", response.xpath('/html/body/div[3]/div/div[2]/div[1]/h1[1]/text()').get())
        item_loader.add_value('film_intrduce', response.xpath(
            '//*[@id="app"]/div/div[1]/div/div[3]/div[1]/div[1]/div[2]/span/text()').get())
        item_loader.add_value('daoyan', response.xpath(
            '//*[@id="app"]/div/div[1]/div/div[3]/div[1]/div[2]/div[2]/div/div[1]/ul/li/div/a/text()').get().strip())
        item_loader.add_xpath('zhuyan',
                              '//*[@id="app"]/div/div[1]/div/div[3]/div[1]/div[2]/div[2]/div/div[2]/ul/descendant::*/text()')
        item = item_loader.load_item()
        yield item

